Contents

Hide code cell source
import warnings
warnings.filterwarnings("ignore")
import os
import os.path as op
import sys
import folium

from myst_nb import glue 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append("../../../../indicators_setup")
from ind_setup.plotting_int import plot_timeseries_interactive
from ind_setup.plotting import plot_bar_probs
from ind_setup.colors import get_df_col
from ind_setup.core import fontsize
from ind_setup.tables import plot_df_table, table_rain_a_summary


sys.path.append("../../../functions")
from data_downloaders import GHCN


from data_downloaders import GHCN, download_oni_index
from ind_setup.plotting_int import plot_oni_index_th
from ind_setup.plotting import plot_bar_probs_ONI, add_oni_cat
import df2img
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 15
     12 import matplotlib.pyplot as plt
     14 sys.path.append("../../../../indicators_setup")
---> 15 from ind_setup.plotting_int import plot_timeseries_interactive
     16 from ind_setup.plotting import plot_bar_probs
     17 from ind_setup.colors import get_df_col

ModuleNotFoundError: No module named 'ind_setup'
country = 'Palau'
vars_interest = ['PRCP']
update_data = False
path_data = "../../../data"
path_figs = "../../../matrix_cc/figures"
Hide code cell source
if update_data:
    df_country = GHCN.get_country_code(country)
    print(f'The GHCN code for {country} is {df_country["Code"].values[0]}')

    df_stations = GHCN.download_stations_info()
    df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]
    print(f'There are {df_country_stations.shape[0]} stations in {country}')
Hide code cell source
if update_data:
    GHCND_dir = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/'
    id = 'PSW00040309' # Koror Station
    dict_prcp = GHCN.extract_dict_data_var(GHCND_dir, 'PRCP', df_country_stations.loc[df_country_stations['ID'] == id])[0]
    data = dict_prcp[0]['data']#.dropna()
    data.to_pickle(op.join(path_data, 'GHCN_precipitation.pkl'))
else:
    data = pd.read_pickle(op.join(path_data, 'GHCN_precipitation.pkl'))
dict_prcp = [{'data' : data, 'var' : 'PRCP', 'ax' : 1, 'label' : 'Precipitation'}, ]
data = dict_prcp[0]['data']#.dropna()
data = data.groupby(data.index.year).filter(lambda x: len(x) >= 300).dropna()

glue("n_years", len(np.unique(data.index.year)), display=False)
data['wet_day'] = np.where(data['PRCP'] > 1, 1, np.where((np.isnan(data['PRCP'])==True), np.nan, 0))
fig, ax = plot_bar_probs(x = [0, 1], y = data.groupby('wet_day').count()['PRCP'].values, labels = ['Dry Days', 'Wet Days'])
ax.set_title('Distribution of Wet Days', fontsize = fontsize)
ax.set_ylabel('Number of Days', fontsize = fontsize)    
Text(0, 0.5, 'Number of Days')
../../../../_images/322935c12f4642fa1ef04c4d4e15072ec93f7e2ec44acbd7b74907f19938b2b5.png
# Correct accumulated precipitation with number of observations per year to make fair comparisons and trends
datag = (data.groupby(data.index.year).sum()/ data.groupby(data.index.year).count()) * 365
datag.index = pd.to_datetime(datag.index, format = '%Y')
dict_plot = [{'data' : datag, 'var' : 'PRCP', 'ax' : 1, 'label':'Accumulated precipitation [mm]'},]
fig, ax = plot_bar_probs(x = datag.index.year, y = datag['PRCP'].values,
                    trendline = True, figsize = [15, 4])
ax.set_ylabel('Accumulated Rainfall [mm]', fontsize = fontsize)
Text(0, 0.5, 'Accumulated Rainfall [mm]')
../../../../_images/b489fd13d2c91699b858f2686dd8f64833bec78d92f5468e46380885a5f37a9a.png
threshold = 1 #np.percentile(data['PRCP'].dropna(), 90)
data['wet_day_t'] = np.where(data['PRCP'] > threshold, 1, np.where((np.isnan(data['PRCP'])==True), np.nan, 0))
data_th = data.groupby([data.index.year, data.wet_day_t]).count()['PRCP']
data_th = data_th/data.groupby(data.index.year).count()['PRCP'] * 365
fig, ax = plt.subplots(figsize = [15, 5])
data_th.unstack().plot(kind = 'bar', stacked = True, ax = ax, color = get_df_col()[:2], edgecolor = 'white', alpha = .5)
ax.set_ylabel('Number of days', fontsize = fontsize)
Text(0, 0.5, 'Number of days')
../../../../_images/f02cf19b642d47b1a020d2ef1afae93500892c31036f99ffaae3c649a26f598d.png
#Wet days
data2 = data.loc[data['wet_day_t'] == 1]
data2 = data2.groupby(data2.index.year).count()
fig, ax, trend = plot_bar_probs(x = data2.index, y = data2.PRCP.values, trendline = True,
               y_label = 'Number of wet days [>1mm]', figsize = [15, 4], return_trend = True)
plt.savefig(op.join(path_figs, 'F7a_Wet_days_1mm.png'), dpi=300, bbox_inches='tight')
glue("trend_wet", float(trend), display=False)
../../../../_images/005b5d6b16d48545ec07512985ea8457bbdbecfddabd45a0681d48bb4b2a3b19.png
#Dry days
data2 = data.loc[data['wet_day_t'] == 0]
data2 = data2.groupby(data2.index.year).count()
fig, ax, trend = plot_bar_probs(x = data2.index, y = data2.PRCP.values, trendline = True,
               y_label = 'Number of dry days [<1mm]', figsize = [15, 4], return_trend = True)
plt.savefig(op.join(path_figs, 'F6a_Number_dry.png'), dpi=300, bbox_inches='tight')

glue('trend_dry', float(trend), display=False)
../../../../_images/0b3769a867939a144e458bb14738f910164d2037edc4ffb7466db5736170281c.png
prcentile = 95
threshold = np.round(np.percentile(data['PRCP'].dropna(), prcentile), 2)
print(f'Threshold of {threshold}mm')
data['wet_day_t'] = np.where(data['PRCP'] > threshold, 1, np.where((np.isnan(data['PRCP'])==True), np.nan, 0))
Threshold of 45.7mm
data_2 = data.loc[data['wet_day_t'] == 1][['PRCP']]
data_over_th = data_2.groupby(data_2.index.year).count()
data_over_th.index = pd.to_datetime(data_over_th.index, format = '%Y')
data_over_th['PRCP_below'] = 365 - data_over_th['PRCP'].values
dict_plot = [{'data' : data_over_th, 'var' : 'PRCP', 'ax' : 1, 'label':f'Number of days over threshold: {threshold}mm'},]
fig = plot_bar_probs(x = data_over_th.index.year, y = data_over_th['PRCP'].values, trendline = True,
               figsize = (15, 5), y_label = f'Number of days over {prcentile} percentile')

plt.savefig(op.join(path_figs, 'F7b_Wet_days_95p.png'), dpi=300, bbox_inches='tight')
../../../../_images/762a27bdb1c3eba5c3768608966613652143d00a6477c9f88606e78b9b4b1bd6.png
data_max = data.groupby(data.index.year).max()
data_max.index = pd.to_datetime(data_max.index, format = '%Y')

dict_plot = [{'data' : data_max, 'var' : 'PRCP', 'ax' : 1, 'label':'Annual daily maxima'},]
plot_timeseries_interactive(dict_plot, trendline = True, figsize = (25, 12), label_yaxes = 'Precipitation [mm]');
df = table_rain_a_summary(data)
fig = plot_df_table(df.T, figsize = (600, 350),)
../../../../_images/7af62fc9e178ee4882c20741d27bc8d73c962e9c3d5a9632064f6fed8a0e2d70.png
p_data = 'https://psl.noaa.gov/data/correlation/oni.data'
if update_data:
    df1 = download_oni_index(p_data)
    df1.to_pickle(op.join(path_data, 'oni_index.pkl'))
else:
    df1 = pd.read_pickle(op.join(path_data, 'oni_index.pkl'))
lims = [-.5, .5]
plot_oni_index_th(df1, lims = lims)
st_data = data
st_data_monthly = st_data.resample('M').mean()
st_data_monthly.index = pd.DatetimeIndex(st_data_monthly.index).to_period('M').to_timestamp() + pd.offsets.MonthBegin(1)
df1['prcp'] = st_data_monthly['PRCP']#.rolling(window=rolling_mean).mean()
df1 = add_oni_cat(df1, lims = lims)
df2 = df1.resample('Y').mean()
df2['prcp_ref'] = df2.prcp - df2.loc['1961':'1990'].prcp.mean()
fig = plot_bar_probs_ONI(df2, var='prcp_ref', y_label = 'Precipitation [mm]')
fig.suptitle('Mean Annual Precipitation Anomaly over the 1961-1990 mean', fontsize = fontsize, y = 1.05)
plt.savefig(op.join(path_figs, 'F5_Rain_mean.png'), dpi=300, bbox_inches='tight')
../../../../_images/58f369e11efacd7a34b05a2b91a706a364920cd0b1fd7259536134c744074ef5.png
df_format = np.round(df1.describe(), 2)
fig = plot_df_table(df_format, figsize = (400, 300))
df2img.save_dataframe(fig=fig, filename="getting_started.png")
../../../../_images/4500dcc3a64542db537d3d61cf42acafdeaf1f5954e591f8d5d50087075d861a.png